package cn.chencaiju.lucene;



import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;

public class LuceneChineseAnalyzer {
	public static void main(String[] args) throws IOException {
		String text="import java.io.Reader;中国青年报\n夜思|错过北大。。。陈财钜";
		Analyzer analyzer=new SmartChineseAnalyzer();
		TokenStream tokenStream = analyzer.tokenStream("name", text);
		tokenStream.reset();
		// 取流里面每个单词位置
		OffsetAttribute offsetAttr = tokenStream.addAttribute(OffsetAttribute.class);
		// 取单词内容
		CharTermAttribute charTermAttr = tokenStream.addAttribute(CharTermAttribute.class);
		// 遍历流里面的每一个单词
		while (tokenStream.incrementToken()) {
			int start = offsetAttr.startOffset();
			int end = offsetAttr.endOffset();
			String term = charTermAttr.toString();
			
			System.out.printf("%s [%d, %d]\n", term, start, end);
		}
		
		tokenStream.end();
		analyzer.close();
		
	}
}
